/*
*  This file is part of ygg-brute
*  Copyright (c) 2020 ygg-brute authors
*  See LICENSE for licensing information
*/

#include <chrono>
#include <ctime>
#include <thread>
#include <condition_variable>
#include <memory>
#include <iostream>
#include <random>
#include <string_view>
#include <cstdio>

#include "third_party/CLI11/CLI11.hpp"

#include "io.hpp"

#include "processor.hpp"
#include "outputter.hpp"

#ifdef CUDA_ENGINE
#   include "cuda/engine.hpp"
#endif

#ifdef OPENCL_ENGINE
#   include "opencl/engine.hpp"
#endif

struct Options {
    size_t generators{0};
    size_t workers{0};
    uint64_t time{0};
    AddressFilter filter;
    GeneratorParams generator_params;
    Engine* engine{nullptr};
};

class Manager {
public:
    Manager(const Options& options)
    {
        for(auto i = 0; i < options.generators + options.workers; ++i)
            free_contexts_.emplace_back(options.engine->make_generator_context());

        for(auto i = 0; i < options.generators; ++i) {
            auto generator_params = options.generator_params;
            generator_params.seed = std::random_device()();
            generator_params.seq = std::random_device()();
            producers_.emplace_back(
                &Manager::produce,
                this,
                options.engine->make_generator(generator_params)
            );
        }

        for(auto i = 0; i < options.workers; ++i)
            processors_.emplace_back(&Manager::process, this, std::make_shared<Processor>(options.filter));
    }

    ~Manager() { stop(); }

    void stop() {
        if(stop_) return;

        {
            std::lock_guard<std::mutex> lock{ctx_mutex_};
            stop_ = true;
        }

        outputter_.stop();

        produce_cv_.notify_all();
        process_cv_.notify_all();

        for(auto& p : producers_)
            p.join();
        for(auto& c : processors_)
            c.join();

        producers_.clear();
        processors_.clear();
    }

private:
    void produce(std::shared_ptr<Generator> gen)
    {
        auto is_ready = [this] { return stop_ || !free_contexts_.empty(); };

        std::unique_lock<std::mutex> lock{ctx_mutex_};
        for(;;) {
            if(!is_ready())
                produce_cv_.wait(lock, is_ready);

            if(stop_) return;
            auto ctx = std::move(free_contexts_.front());
            free_contexts_.pop_front();

            lock.unlock();

            gen->produce(*ctx);

            lock.lock();
            ready_contexts_.emplace_back(std::move(ctx));
            lock.unlock();
            process_cv_.notify_one();
            lock.lock();
        }
    }

    void process(std::shared_ptr<Processor> processor)
    {
        auto is_ready = [this] { return stop_ || !ready_contexts_.empty(); };
        std::unique_lock<std::mutex> lock{ctx_mutex_};
        for(;;) {
            if(!is_ready())
                process_cv_.wait(lock, is_ready);
            if(stop_) return;
            auto ctx = std::move(ready_contexts_.front());
            ready_contexts_.pop_front();
            lock.unlock();

            auto res = processor->process(*ctx);

            lock.lock();

            free_contexts_.emplace_back(std::move(ctx));

            lock.unlock();

            produce_cv_.notify_one();
            outputter_.output(std::move(res.results), res.stats);

            lock.lock();
        }
    }

private:
    std::list<std::unique_ptr<GeneratorCtx>> free_contexts_;
    std::list<std::unique_ptr<GeneratorCtx>> ready_contexts_;

    std::mutex ctx_mutex_;

    std::condition_variable produce_cv_;
    std::condition_variable process_cv_;

    Outputter outputter_;
    std::list<std::thread> producers_;
    std::list<std::thread> processors_;

    bool stop_{false};
};

static void run(Options options)
{
    using clock_t = std::chrono::steady_clock;

    if(!options.workers)
        options.workers = std::min(std::thread::hardware_concurrency(), 2u);
    if(!options.generators)
        options.generators = 2;

    options.engine->fill_params(options.generator_params);

    std::cerr << "=================================================================" << std::endl;

    std::cerr << "Starting with " << options.engine->name() << ", ";
    std::cerr << options.workers << " worker(s), " << options.generators << " generator(s)" << std::endl;

    auto& gparams = options.generator_params;
    std::cerr << "on device " << options.engine->device_name(gparams.device) << std::endl;

    std::cerr << "block size " << gparams.block_size << ", number of blocks " << gparams.n_blocks << " ";

    std::cerr << "(batch size " << (gparams.block_size * gparams.n_blocks) << ")" << std::endl;
    std::cerr << "invert batch size " << gparams.inv_batch_size << std::endl;

    std::cerr << "=================================================================" << std::endl;

    uint64_t sleep_seconds = 60;
    if(options.time) sleep_seconds = std::min(options.time, sleep_seconds);

    const auto start_ts = clock_t::now();

    Manager mgr(options);

    for(;;) {
        std::this_thread::sleep_for(std::chrono::seconds(sleep_seconds));
        if(options.time) {
            auto now = clock_t::now();
            auto elapsed_seconds = std::chrono::duration_cast<std::chrono::seconds>(now - start_ts).count();
            if(elapsed_seconds > options.time) return;
            sleep_seconds = std::min(sleep_seconds, options.time - elapsed_seconds);
        }
    }
}

int main(int argc, char **argv)
{
    std::vector<Engine*> g_engines;

#   ifdef CUDA_ENGINE
    g_engines.push_back(&cuda::get_engine());
#   endif

#   ifdef OPENCL_ENGINE
    g_engines.push_back(&opencl::get_engine());
#   endif

    bool info{false};
    std::string engine;
    std::string engines;

    if(g_engines.empty()) {
        engines = "none";
        engine = "none";
    } else {
        engine = g_engines.front()->name();
        for(auto& e : g_engines) {
            if(!engines.empty()) engines += ", ";
            engines += e->name();
        }
    }

    CLI::App app{"Yggdrasil address bruteforcer"};
    Options options;
    app.add_option("--min-hi", options.filter.min_hi,
         "Required minimal address difficulty (2nd bit's value), "
         "lower addresses will be discarded")
        ->check(CLI::Range(1, 255));
    app.add_option("--hi", options.filter.hi,
        "Desired minimal address difficulty, match is greater or equal")
        ->check(CLI::Range(1, 255));
    app.add_option("--re", options.filter.re,
        "Regex to match address string (can be specified multiple times)");
    app.add_option("-t,--time", options.time, "Number of seconds to run");
    app.add_option("--workers", options.workers, "Number of threads matching addresses");
    app.add_option("--generators", options.generators, "Number of threads generating addresses");

    app.add_option("--engine", engine, "Heterogenous computation runtime (" + engines + ")");
    app.add_option("--device", options.generator_params.device,
        "Device index");
    app.add_option("--block-size", options.generator_params.block_size,
        "Bruteforcer block size");
    app.add_option("--blocks", options.generator_params.n_blocks,
        "Number of blocks in batch");
    app.add_option("--inv-batch-size", options.generator_params.inv_batch_size,
        "Inversion batch size (advanced)");
    app.add_flag("--info", info, "Display system information");

    CLI11_PARSE(app, argc, argv);

    try {
        if(info) {
            std::cout << "Supported engines: " << engines << std::endl;
            for(auto& e : g_engines) {
                e->print_info();
            }

            return 0;
        }

        for(auto& e : g_engines) {
            if(engine == e->name()) {
                options.engine = e;
                break;
            }
        }

        if(!options.engine)
            throw std::runtime_error{"Unsupported engine: '" + engine + "'"};

        run(std::move(options));
    } catch(const std::exception& ex) {
        std::cerr << "Error: " << ex.what() << std::endl;
        return 1;
    }
    return 0;
}